*************************************************************************************************
* Documentation for Reproduction of
* Don't Know Much about Democracy: Reporting Survey Data with Nonsubstantive Responses. 2012
* PS: Political Science & Politics 45(3): 462-467.
* Jonathan Really and L.J Zigerell
* Code by L.J Zigerell
* Stata version 11
*************************************************************************************************
*************************************************************************************************
set more off
set mem 500m
* use "wvs1981_2008_v20090914.dta", clear [This is the command to load the dataset]
*************************************************************************************************
* this set of commands tabulates the item reported in Figure 1 and Figure 2
tab e117
tab e117, mi
tab e117, nol mi

* this set of commands generates a "left bound" variable for e117 in which 1 is a report that
* democracy would be "very good" or "fairly good" and 0 is another substantive response or
* a non-substantive response
gen e117_lb = e117
recode e117_lb (1 2=1) (3 4 .=0)

* this set of commands generates a "missing" variable for e117 in which 1 is a non-substantive
* response to e117
gen e117_mis = e117
recode e117_mis (.=1) (1/4=0)

* this set of commands generates an "inglehart" variable for e117 in which 1 is a report that
* democracy would be "very good" or "fairly good" and 0 is another substantive response
gen e117_ing = e117
recode e117_ing (1 2=1) (3 4=0)
*************************************************************************************************
*************************************************************************************************
* this set of commands generates percentages in Figure 1 and Figure 2 for the World Values Survey
tabstat e117_lb e117_mis e117_ing if s003==8 & s020==1998, stats(mean, n) // Albania 1998
tabstat e117_lb e117_mis e117_ing if s003==32 & s020==1999, stats(mean, n) // Argentina 1999
tabstat e117_lb e117_mis e117_ing if s003==51 & s020==1997, stats(mean, n) // Armenia 1997
tabstat e117_lb e117_mis e117_ing if s003==36 & s020==1995, stats(mean, n) // Australia 1995
tabstat e117_lb e117_mis e117_ing if s003==31 & s020==1997, stats(mean, n) // Azerbaijan 1997
tabstat e117_lb e117_mis e117_ing if s003==50 & s020==1996, stats(mean, n) // Bangladesh 1996
tabstat e117_lb e117_mis e117_ing if s003==70 & s020==1998, stats(mean, n) // Bosnia and Herzegovina 1998
tabstat e117_lb e117_mis e117_ing if s003==76 & s020==1997, stats(mean, n) // Brazil 1997
tabstat e117_lb e117_mis e117_ing if s003==124 & s020==2000, stats(mean, n) // Canada 2000
tabstat e117_lb e117_mis e117_ing if s003==152 & s020==2000, stats(mean, n) // Chile 2000
tabstat e117_lb e117_mis e117_ing if s003==156 & s020==2001, stats(mean, n) // China 2001
tabstat e117_lb e117_mis e117_ing if s003==170 & s020==1997, stats(mean, n) // Colombia 1997
tabstat e117_lb e117_mis e117_ing if s003==214 & s020==1996, stats(mean, n) // Dominican Republic 1996
tabstat e117_lb e117_mis e117_ing if s003==818 & s020==2000, stats(mean, n) // Egypt 2000
tabstat e117_lb e117_mis e117_ing if s003==222 & s020==1999, stats(mean, n) // El Salvador 1999
tabstat e117_lb e117_mis e117_ing if s003==268 & s020==1996, stats(mean, n) // Georgia 1996
tabstat e117_lb e117_mis e117_ing if s003==356 & s020==2001, stats(mean, n) // India 2001
tabstat e117_lb e117_mis e117_ing if s003==360 & s020==2001, stats(mean, n) // Indonesia 2001
tabstat e117_lb e117_mis e117_ing if s003==364 & s020==2000, stats(mean, n) // Iran 2000
tabstat e117_lb e117_mis e117_ing if s003==392 & s020==2000, stats(mean, n) // Japan 2000
tabstat e117_lb e117_mis e117_ing if s003==400 & s020==2001, stats(mean, n) // Jordan 2001
tabstat e117_lb e117_mis e117_ing if s003==807 & s020==1998, stats(mean, n) // Macedonia 1998
tabstat e117_lb e117_mis e117_ing if s003==484 & s020==2000, stats(mean, n) // Mexico 2000
tabstat e117_lb e117_mis e117_ing if s003==498 & s020==1996, stats(mean, n) // Moldova 1996
tabstat e117_lb e117_mis e117_ing if s003==504 & s020==2001, stats(mean, n) // Morocco 2001
tabstat e117_lb e117_mis e117_ing if s003==554 & s020==1998, stats(mean, n) // New Zealand 1998
tabstat e117_lb e117_mis e117_ing if s003==566 & s020==2000, stats(mean, n) // Nigeria 2000
tabstat e117_lb e117_mis e117_ing if s003==578 & s020==1996, stats(mean, n) // Norway 1996
tabstat e117_lb e117_mis e117_ing if s003==586 & s020==1997, stats(mean, n) // Pakistan 1997
tabstat e117_lb e117_mis e117_ing if s003==604 & s020==1996, stats(mean, n) // Peru 1996
tabstat e117_lb e117_mis e117_ing if s003==608 & s020==2001, stats(mean, n) // Philippines 2001
tabstat e117_lb e117_mis e117_ing if s003==630 & s020==2001, stats(mean, n) // Puerto Rico 2001
tabstat e117_lb e117_mis e117_ing if s003==891 & s020==2001, stats(mean, n) // Serbia and Montenegro 2001
tabstat e117_lb e117_mis e117_ing if s003==710 & s020==2001, stats(mean, n) // South Africa 2001
tabstat e117_lb e117_mis e117_ing if s003==410 & s020==2001, stats(mean, n) // South Korea 2001
tabstat e117_lb e117_mis e117_ing if s003==752 & s020==1999, stats(mean, n) // Sweden 1999
tabstat e117_lb e117_mis e117_ing if s003==756 & s020==1996, stats(mean, n) // Switzerland 1996
tabstat e117_lb e117_mis e117_ing if s003==158 & s020==1994, stats(mean, n) // Taiwan 1994
tabstat e117_lb e117_mis e117_ing if s003==834 & s020==2001, stats(mean, n) // Tanzania 2001
tabstat e117_lb e117_mis e117_ing if s003==792 & s020==2001, stats(mean, n) // Turkey 2001
tabstat e117_lb e117_mis e117_ing if s003==800 & s020==2001, stats(mean, n) // Uganda 2001
tabstat e117_lb e117_mis e117_ing if s003==840 & s020==1999, stats(mean, n) // United States 1999
tabstat e117_lb e117_mis e117_ing if s003==858 & s020==1996, stats(mean, n) // Uruguay 1996
tabstat e117_lb e117_mis e117_ing if s003==862 & s020==2000, stats(mean, n) // Venezuela 2000
tabstat e117_lb e117_mis e117_ing if s003==704 & s020==2001, stats(mean, n) // Vietnam 2001
tabstat e117_lb e117_mis e117_ing if s003==716 & s020==2001, stats(mean, n) // Zimbabwe 2001
*************************************************************************************************
*************************************************************************************************
* the remaining commands are conducted on only the 2000 Iran Values Survey
* use "wvs1981_2008_v20090914.dta", clear [This is the command to load the dataset]

tab s003 if s003==364 /* this checks the country code for Iran */
tab s020 if s020==2000 /* this checks the year code for Iran */
keep if s003==364 & s020==2000 /* this keeps only Iran 2000 cases */
tab1 s003 s020 /* this checks to make sure that only Iran 2000 cases were kept */

* get basic statistics from item e117
tab e117, mi
tab e117 if e117==1 | e117==2, mi
di 1397/2532
tab e117 if e117==3 | e117==4, mi
di 235/2532
tab e117 if e117==., mi
di 900/2532

* generate a dichotomous variable for e117 excluding non-substantive responses
* 1 is a response of "very good" or "fairly good" 
tab e117, mi
tab e117, mi nol
gen e117ing = e117
recode e117ing (1 2=1) (3 4=0)
tab e117
tab e117ing, mi

* generate a dichotomous variable for e117 including non-substantive responses
* 1 is a response of "very good" or "fairly good" 
tab e117, mi
tab e117, mi nol
gen e117lb = e117
recode e117lb (1 2=1) (3 4 .=0)
tab e117lb, mi

* generate a variable for non-substantive responses to e117
* 1 is a non-substantive response 
tab e117, mi
tab e117, mi nol
gen e117mis = e117
recode e117mis (.=1) (1/4=0)
tab e117mis, mi

* generate variables for female, age, and education
tab x001, mi
gen female = x001 - 1
tab female

sum x003
gen age = x003
sum age

tab x025, mi
gen education = x025
tab education

* correlate missing responses with gender age and education
* these data are reported on page 463 column 1
pwcorr e117mis age education, sig obs
tab e117mis if female==1, mi
tab e117mis if female==0, mi
di 1171+1361
// did not correlate gender because gender is dichotomous

* generate predicted probabilities for democratic support excluding non-substantive response
* these data are reported on page 463 column 2
estsimp logit e117ing female age education
setx mean
tab education
setx education 1
simqi
setx education 2
simqi
setx education 3
simqi
setx education 4
simqi
setx education 5
simqi
setx education 6
simqi
setx education 7
simqi
setx education 8
simqi
drop b1-b4

* generate predicted probabilities for democratic support including non-substantive response
* these data are reported on page 463 column 2
estsimp logit e117lb female age education
setx mean
setx education 1
simqi
setx education 2
simqi
setx education 3
simqi
setx education 4
simqi
setx education 5
simqi
setx education 6
simqi
setx education 7
simqi
setx education 8
simqi
drop b1-b4

* restimate the missing regression with mlogit and a three-value variable
gen e117_3 = e117
recode e117_3 (1 2=1) (3 4=-1) (.=0)
tab e117_3

ologit e117_3 female age education
brant
estsimp mlogit e117_3 female age education
setx mean
setx education 1
simqi
setx education 2
simqi
setx education 3
simqi
setx education 4
simqi
setx education 5
simqi
setx education 6
simqi
setx education 7
simqi
setx education 8
simqi
drop b1-b4

* prtest for difference between e117 missings by gender
prtest e117mis, by(female)

* use basic demographics to estimate true democratic support with the inverse probability method
* these data are reported on page 464 column 2
gen e117notmis = e117
recode e117notmis (.=0) (1/4=1)

logit e117notmis female age education
predict pi1
sum pi1
sum pi1 if female==1 & age==65 & education==1
gen ipw1 = 1/pi1
sum ipw1
sum ipw1 if e117notmis==1
sum ipw1 if e117notmis==0
sum ipw1 if female==1 & age==65 & education==1
svyset [pw=ipw1]
svy: mean e117ing
sum e117ing
drop pi1 ipw1

* use basic demographics and democratic responses to estimate true... 
* ...democratic support with the inverse probability method
* these data are reported on page 465 column 1
tab e114, mi
gen e114_3 = e114
recode e114_3 (1 2=1) (3 4=-1) (.=0)
tab e114_3, mi

tab e115, mi
gen e115_3 = e115
recode e115_3 (1 2=1) (3 4=-1) (.=0)
tab e115_3, mi

tab e116, mi
gen e116_3 = e116
recode e116_3 (1 2=1) (3 4=-1) (.=0)
tab e116_3, mi

tab e123, mi
gen e123_3 = e123
recode e123_3 (1 2=1) (3 4=-1) (.=0)
tab e123_3, mi

logit e117notmis female age education e114_3 e115_3 e116_3 e123_3
predict pi2
gen ipw2 = 1/pi2
sum ipw2 if e117notmis==1
sum ipw2 if e117notmis==0
svyset [pw=ipw2]
svy: mean e117ing
sum e117ing
sum ipw2
drop pi2 ipw2

* calculate the percentage of mixed democrats
* these data are reported on page 466 column 1
tab e116 e117, mi
di (444+191+225+213)/(444+191+76+64+75+225+213+53+20+36)
di (444+191+225+213)/2532
tab e117 if e117lb==1
tab e116 if e117lb==1, mi
di (669+404)/1397

* heckman selection model
* these data are reported on page 465 column 2
estsimp logit e117lb female age education
setx mean
simqi
gen test = 1/(1+exp(-(-0.966-0.41*female+0.012*age+0.21*education)))
sum test
pwcorr test e117lb
drop b1-b4

tab e023, mi

tab e117ing, mi
heckprob e117ing female age education e114_3 e115_3 e116_3 e123_3, select(female age education e114_3 e115_3 e116_3 e123_3 e023) robust
predict p11, p11
predict p01, p01
predict p10, p10
predict p00, p00
sum p11 p01 p10 p00
drop p11 p01 p10 p00
di 57.37+24.51
margins
margins, atmeans
di 9.02+9.10
di (24.51)/(24.51+9.10)
margins if e117ing==.
di (1397+0.56*900)/2532
di (1397+0.79*900)/2532
di (1397+1.00*900)/2532
di (1397+1.02*900)/2532

gen e117imp = 1/(1+exp(-(-0.119+-0.177*female+0.0155*age+0.059*education-0.0276*e114_3-0.089*e115_3+0.306*e116_3+0.549*e123_3)))
sum e117imp
sum e117imp if e117ing == 1
sum e117imp if e117ing == 0
sum e117imp if e117imp >= 0.50
sum e117imp if e117imp <  0.50
di 2338/(2338+86)
pwcorr e117imp e117ing

heckprob e117ing female age education e114_3 e115_3 e116_3 e123_3 c006 e069_11 e069_01 f028, select(female age education e114_3 e115_3 e116_3 e123_3 c006 e069_11 e069_01 f028 a062 e023 a004) robust
predict p11, p11
predict p01, p01
predict p10, p10
predict p00, p00
sum p11 p01 p10 p00
drop p11 p01 p10 p00
di 59.67+28.74
di 28.74/(28.74+0.96)
margins
margins, atmeans
margins if e117ing==.
di (1397+0.816*900)/2532
di (1397+0.870*900)/2532
di (1397+0.923*900)/2532

* multiple impute
* these data are reported on page 465 column 1
mi set mlong
mi register imputed e117ing
mi register regular female age education
mi impute mvn e117ing = female age education, replace add(3) rseed(123) force
mi estimate: mean e117ing

mi set mlong
mi unregister e117ing
mi register imputed e117ing
mi register regular female age education e114_3 e115_3 e116_3 e123_3
mi impute mvn e117ing = female age education e114_3 e115_3 e116_3 e123_3, replace add(3) rseed(123) force
mi estimate: mean e117ing

mi set mlong
mi unregister e117ing
mi register imputed e117ing
mi register regular female age education e114_3 e115_3 e116_3 e123_3 e069_11 e111 e120 e121 e122 c006 f028
mi impute mvn e117ing = female age education e114_3 e115_3 e116_3 e123_3 e069_11 e111 e120 e121 e122 c006 f028, add(3) force rseed(123) replace
mi estimate: mean e117ing
*************************************************************************************************
* See the [2 EVS Reilly Zigerell 2012 Don't Know Much about Democracy] Stata do file 
* for commands to calculate Figure 2 percentages for countries in the European Values Study
